library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ─────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.2 ✓ purrr 0.3.4
✓ tibble 3.0.4 ✓ dplyr 1.0.2
✓ tidyr 1.1.2 ✓ stringr 1.4.0
✓ readr 1.4.0 ✓ forcats 0.5.0
package ‘ggplot2’ was built under R version 3.6.2package ‘tibble’ was built under R version 3.6.2package ‘tidyr’ was built under R version 3.6.2package ‘readr’ was built under R version 3.6.2package ‘purrr’ was built under R version 3.6.2package ‘dplyr’ was built under R version 3.6.2── Conflicts ────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(janitor)
package ‘janitor’ was built under R version 3.6.2
Attaching package: ‘janitor’
The following objects are masked from ‘package:stats’:
chisq.test, fisher.test
library(sf)
package ‘sf’ was built under R version 3.6.2Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
alc <- read_csv("~/Downloads/clean_alcohol_codes.csv")
── Column specification ────────────────────────────────────────────────────────────────
cols(
feature_code = col_character(),
date_code = col_character(),
value_per_one_hundred_thousand_people = col_double(),
alcohol_condition = col_character(),
type_of_hospital = col_character()
)
health_board <- st_read("../raw_data/SG_NHS_HealthBoards_2019/SG_NHS_HealthBoards_2019.shp", quiet = TRUE)
head(health_board)
Simple feature collection with 6 features and 4 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: 83941 ymin: 530250.8 xmax: 414219.1 ymax: 979400.7
CRS: 27700
HBCode HBName Shape_Leng Shape_Area geometry
1 S08000015 Ayrshire and Arran 679782.3 3408802229 MULTIPOLYGON (((201916.2 60...
2 S08000016 Borders 525406.7 4742684960 MULTIPOLYGON (((345325.9 57...
3 S08000017 Dumfries and Galloway 830301.2 6676314851 MULTIPOLYGON (((266004.4 54...
4 S08000019 Forth Valley 430568.3 2733658721 MULTIPOLYGON (((256533 7405...
5 S08000020 Grampian 802844.3 8800885268 MULTIPOLYGON (((383521.2 77...
6 S08000022 Highland 8486537.7 33637044520 MULTIPOLYGON (((173254 6049...
health_board_simply <- st_simplify(health_board, dTolerance = 2000)
head(health_board_simply)
Simple feature collection with 6 features and 4 fields
geometry type: GEOMETRY
dimension: XY
bbox: xmin: 91962.5 ymin: 530635.8 xmax: 413825.1 ymax: 976907.5
CRS: 27700
HBCode HBName Shape_Leng Shape_Area geometry
1 S08000015 Ayrshire and Arran 679782.3 3408802229 MULTIPOLYGON (((225054.5 66...
2 S08000016 Borders 525406.7 4742684960 POLYGON ((345325.9 579663.3...
3 S08000017 Dumfries and Galloway 830301.2 6676314851 POLYGON ((275243.5 620000.1...
4 S08000019 Forth Valley 430568.3 2733658721 POLYGON ((256533 740502, 25...
5 S08000020 Grampian 802844.3 8800885268 POLYGON ((320000.1 871554.8...
6 S08000022 Highland 8486537.7 33637044520 MULTIPOLYGON (((171127.7 77...
plot(health_board_simply)
both_data <- full_join(alc, health_board_simply, by = c("feature_code" = "HBCode"))
both_health_data <- both_data %>%
filter(feature_code != "S92000003")
both_health_data
both_health_data %>%
filter(date_code == "2013/2014") %>%
ggplot(aes(fill = value_per_one_hundred_thousand_people,
geometry = geometry)) +
geom_sf() +
theme_void() +
scale_fill_viridis_c(option = "plasma", trans = "sqrt") +
labs(
fill = "Stays per 100,000 population"
)
data_zone <- read_csv("~/Downloads/Datazone2011lookup.csv")
── Column specification ────────────────────────────────────────────────────────────────
cols(
.default = col_character(),
UR2_Code = col_double(),
UR3_Code = col_double(),
UR6_Code = col_double(),
UR8_Code = col_double()
)
ℹ Use `spec()` for the full column specifications.
names(data_zone)
[1] "DZ2011_Code" "DZ2011_Name" "IZ2011_Code" "IZ2011_Name" "MMWard_Code"
[6] "MMWard_Name" "LA_Code" "LA_Name" "SPC_Code" "SPC_Name"
[11] "UKPC_Code" "UKPC_Name" "HB_Code" "HB_Name" "HIA_Code"
[16] "HIA_Name" "SPD_Code" "SPD_Name" "SFRLSO_Code" "SFRLSO_Name"
[21] "SFRSDA_Code" "SFRSDA_Name" "RRP_Code" "RRP_Name" "LRP_Code"
[26] "LRP_Name" "TTWA2011_Code" "TTWA2011_Name" "UR2_Code" "UR2_Name"
[31] "UR3_Code" "UR3_Name" "UR6_Code" "UR6_Name" "UR8_Code"
[36] "UR8_Name" "Country_Code" "Country_Name"
new_hb <- data_zone %>%
select(LA_Code, LA_Name, HB_Code, HB_Name)
new_hb
alc_raw_data <- read_csv("../raw_data/alcohol_related_hospital_stats.csv") %>%
clean_names()
── Column specification ────────────────────────────────────────────────────────────────
cols(
FeatureCode = col_character(),
DateCode = col_character(),
Measurement = col_character(),
Units = col_character(),
Value = col_double(),
`Alcohol Condition` = col_character(),
`Alcohol Related Hospital Activity` = col_character(),
`Type Of Hospital` = col_character()
)
alc_data <- alc_raw_data %>%
filter(date_code %in% c("2016/2017","2011/2012","2012/2013", "2010/2011", "2015/2016", "2009/2010", "2014/2015", "2018/2019", "2017/2018", "2013/2014")) %>%
rename(year = date_code) %>%
mutate(measurement = str_to_lower(measurement)) %>%
mutate(units = str_to_lower(units)) %>%
rename(hospital_activity = alcohol_related_hospital_activity) %>%
mutate(hospital_activity = str_to_lower(hospital_activity)) %>%
mutate(type_of_hospital = str_to_lower(type_of_hospital)) %>%
mutate(year = factor(year, levels = c("2009/2010", "2010/2011", "2011/2012", "2012/2013", "2013/2014", "2014/2015", "2015/2016", "2016/2017", "2017/2018", "2018/2019")))
alc_data
alc_clean <- alc_data %>%
filter(measurement == "ratio") %>%
filter(hospital_activity == "stays")
alc_clean
both_alc_data <- inner_join(alc_clean, new_hb, by = c("feature_code" = "LA_Code"))
both_alc_data
super_puper_data <- distinct(both_alc_data)
super_puper_data
super_duper_data <- alc_data %>%
mutate(hb_assign = case_when(
feature_code == "S12000033" ~ "S08000020",
feature_code == "S12000034" ~ "S08000020",
feature_code == "S12000041" ~ "S08000030",
feature_code == "S12000035" ~ "S08000022",
feature_code == "S12000005" ~ "S08000019",
feature_code == "S12000006" ~ "S08000017",
feature_code == "S12000042" ~ "S08000030",
feature_code == "S12000008" ~ "S08000015",
feature_code == "S12000045" ~ "S08000031",
feature_code == "S12000010" ~ "S08000024",
feature_code == "S12000011" ~ "S08000031",
feature_code == "S12000036" ~ "S08000024",
feature_code == "S12000013" ~ "S08000028",
feature_code == "S12000014" ~ "S08000019",
feature_code == "S12000047" ~ "S08000029",
feature_code == "S12000049" ~ "S08000031",
feature_code == "S12000017" ~ "S08000022",
feature_code == "S12000018" ~ "S08000031",
feature_code == "S12000019" ~ "S08000024",
feature_code == "S12000020" ~ "S08000020",
feature_code == "S12000021" ~ "S08000015",
feature_code == "S12000050" ~ "S08000032",
feature_code == "S12000023" ~ "S08000025",
feature_code == "S12000048" ~ "S08000030",
feature_code == "S12000038" ~ "S08000031",
feature_code == "S12000026" ~ "S08000016",
feature_code == "S12000027" ~ "S08000026",
feature_code == "S12000028" ~ "S08000015",
feature_code == "S12000029" ~ "S08000032",
feature_code == "S12000030" ~ "S08000019",
feature_code == "S12000039" ~ "S08000031",
feature_code == "S12000040" ~ "S08000024"))
final_data <- super_duper_data %>%
filter(!is.na(hb_assign)) %>%
filter(hospital_activity == "stays") %>%
filter(alcohol_condition != "All alcohol conditions", alcohol_condition !=
"All mental & behavioural disorders due to use of alcohol (M&B)",
alcohol_condition != "All Alcoholic Liver Disease (ALD)") %>%
mutate(alcohol_condition = str_remove_all(alcohol_condition, "M\\&B\\ \\-\\ ")) %>%
mutate(alcohol_condition = str_remove_all(alcohol_condition, "ALD\\ \\-\\ ")) %>%
filter(type_of_hospital == "general acute hospital")
library(plotly)
package ‘plotly’ was built under R version 3.6.2Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
ggplotly(final_final_data %>%
filter(year == "2013/2014") %>%
filter(HBName == "Greater Glasgow and Clyde") %>%
filter(measurement == "ratio") %>%
ggplot() +
aes(x = reorder(alcohol_condition, value),
y = value,
fill = alcohol_condition) +
geom_col() +
theme_classic() +
labs(
title = "Reasons of longest stays",
x = "Condition",
y = "Stays per 100,000") +
theme(legend.position = "none") +
coord_flip() +
scale_y_continuous(breaks = c(0,300,600,900,1200,1500,1800,2100,2400,2700,
3000, 3300)) +
theme(text = element_text(size=13)))
both_health_data %>%
filter(date_code == "2013/2014") %>%
mutate(show_edge = feature_code == "S08000031") %>%
filter(type_of_hospital == "General Acute Hospital") %>%
ggplot(aes(fill = value_per_one_hundred_thousand_people,
geometry = geometry,
colour = show_edge)) +
geom_sf() +
theme_void() +
labs(
fill = "Stays per 100,000 population"
) +
scale_color_manual(values=c("blue4", "yellow"), guide = "none") +
theme(
legend.title = element_text(color = "black", size = 14),
legend.text = element_text(color = "black", size = 10),
legend.position = "bottom"
) +
scale_fill_viridis_c(option = "D", trans = "sqrt")
final_final_data <- full_join(final_data, health_board_simply, by = c("hb_assign" = "HBCode")) %>%
filter(type_of_hospital == "general acute hospital")
final_final_data
write_csv(count_data, "clean_count_map.csv")
final_count_data %>%
filter(year == "2018/2019") %>%
mutate(show_edge = HBName == "Greater Glasgow and Clyde") %>%
ggplot(aes(fill = total, geometry = geometry, colour = show_edge)) +
geom_sf() +
scale_fill_viridis_c(option = "D", trans = "sqrt") +
scale_color_manual(values=c("blue4", "yellow"), guide = "none") +
theme_void() +
labs(
fill = "Total stays"
) +
theme(
legend.title = element_text(color = "black", size = 14),
legend.text = element_text(color = "black", size = 10),
legend.position = "right"
)
#Write csv separately and join with geo file in global.
write_csv(final_data, "alcohol_clean_for_map_data.csv")